#! /usr/bin/gawk -f
#Usage: gawk -f sjwww.awk scijokes.txt
#Version 1.5 xs4all version   Time-stamp: "July 14, 1997"
# Make from scijokes.txt html-files.
BEGIN {
 mybegindate=""
 mycounter="<a href=\"../totstat.html\">Hit Statistics</a>"

 sjmain="index.html"
 new="new.html"
 acknow="acknow.html"
 ref_sec=0
 beginning=1
 htmltype="<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2//EN\">"

#begin main file
 section=0;
 out=sjmain
 print "out= " out;
 print htmltype > out
 print "<html><head>" > out;
 print "<title>Science Jokes</title>" > out;
 print "<META Name=\"description\" Content=\"Huge (over 1M) collection of" > out
 print "mathematics, physics, chemistry and biology humor. It also includes" > out
 print " humorous quotes, rhymes, mnemonics and anecdotes about scientists\">"> out
 print ""
 print "<META Name=\"keywords\" Content=\"science, mathematics, physics," > out
 print "chemistry, biology, humor, humour, joke, quote, quotation," > out
 print "mnemonic, anecdote, proof, statistics\">" > out
 print "</head>" > out;
 print "<body bgcolor=\"#ffffff\">" > out;
print "<p align=center>" > out;
print "<a href=\"http://www.fys.ruu.nl/~verhagen/joketalk.html\">Useless information</a> |" > out;
print "<a href=\"http://www.fys.ruu.nl/~verhagen/\">Home page Joachim</a>" > out;
print "</p>" > out;

#begin newjokes
print htmltype > new
print "<html><head>" > new
print "<title>New or changed science jokes</title>" > new
print "</head>" > new
print "<body bgcolor=\"#ffffff\">" > new
print "<p align=center>" > new;
print "<a href=\"" sjmain "\">Index</a> |" > new;
print "<a href=\"mailto:jcdverha@xs4all.nl\">Comments and Contributions</a></p>" > new;

print "<h2>New or changed science jokes</h2>\n<br>\n" > new
}

/SOURCES OF SCIENCE HUMOR ON AND OFF THE NET/ {ref_sec=1}

#special characters in HTML:
  {
    gsub(/&/,"\\&amp;",$0);
    gsub(/>/,"\\&gt;",$0);
    gsub(/</,"\\&lt;",$0);}

#Adresses:
 /@/ &&(!section||/^From:/||(ref_sec)) {
   for(i=1;i<=NF;i++) if($i ~ /@/) {
   gsub(/[)><(]|&gt;|&lt;/,"",$i);
   gsub(/.*/,"<a href=\"mailto:&\">&</a>",$i);
  }}

# version number and header
(NR==1) { print "<h1>science jokes</h1>" > out ; next}
/^Version/ && (!section){
  version=$0 ;
  print "<h3>" $0 ". &nbsp;&nbsp;&nbsp;This page made on " strftime("%d-%m-%y",systime()) "</h3>" > out;
# This is the place to put extra information, if you like.
# print "" > out;
  print "<pre>" > out;
print "Collected by Joachim Verhagen (jcdverha@xs4all.nl)" > out

print "<html><head><title>Science Jokes: acknowledgements</title></head>" > acknow
print "<body bgcolor=\"#ffffff\">" > acknow
print "<h2>Acknowledgements</h2>" > acknow
print "Collected by Joachim Verhagen (jcdverha@xs4all.nl)<br>" > acknow
print "Includes collection by Lars Olofsson (larso@cs.chalmers.se) of April 1994<br>" > acknow
print "Includes math jokes collection by Michael Cook (mlc@iberia.cca.rockwell.com)" > acknow
print "of June 1994<br>" > acknow
print "Includes collection by Chris Bradfield  (ph2008@bris.ac.uk) of October 1994<br>" > acknow
print "Includes collection by  Richard D. LeBreton (S5100101@nickel.laurentian.ca)" > acknow
print "of Februari 1995<br>" > acknow
print "<p align=center>[<a href=\"" sjmain "\">Index</a>]</p><br>" > acknow
print "</body></html>" > acknow
close(acknow);
beginning=2
next
}

##main file
# last time posted
/^\* New entry since last time posted/ {
  beginning=0
  start=index($0,"(")
  end=length($0)
  last_posted= "after last time posted " substr($0,start,end-start+1) " "}

(beginning==2) {next}

#Codes for subject are not necessary here.
out==sjmain && /Codes for subjects:/ {next}
out==sjmain && /M mathematics ; P physics ; C chemistry ; B biology ; E engineering/ {next}
out==sjmain && /A computer science./ {next}
out==sjmain && /New entry since last time posted/ { next}
out==sjmain && /Changed entry since last time posted/ {next}



# Contents in main section
out==sjmain && /CONTENTS/ {
  print "<h3>Contents</h3>\n<ul>" > out;
  getline;
  print "<li><a href=\"acknow.html\">Acknowledgements</a>" > out
  print "<li><a href=\"new.html\">New or changed jokes since " last_posted "</a><br>" > out;
  print "<p>New or changed jokes since " last_posted " </p>" > new
  while ($0 ~ /^=[0-9]+\./ ) {
    match($1,/\./);
    ref=substr($1,2,RSTART-2);

    if(length($1)>RSTART) {
#subsection
    subref=substr($1,RSTART+1,1);
    link=sprintf("%s_%s.html",ref,subref);
    if(nsubref==0) {print "<ul>" > out ; nsubref=1}
    href[ref,subref]="<li><a href=\"" link "\">" substr($0,2) "</a>"
    nhref[ref]++;
    print href[ref,subref] > out;
      } 
    else { 
#main section
      subref="";
      link=sprintf("%s.html",ref);
      if(nsubref==1) {print "</ul>" > out}
      nsubref=0;
      print "<li><a href=\"" link "\">" substr($0,2) "</a>" > out;
        }
    getline;
  }
  print "</ul>" > out;
}

#section 10

/^FTP:/ || (ref_sec) && /FTP:/ {
  gsub(/ftp:.*/,"<a href=\"&\">&</a>",$0)}
/^WWW:/ || (ref_sec) && /WWW:/ {
  gsub(/http:.*/,"<a href=\"&\">&</a>",$0)}



# end (sub)section
/^\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\+\++$/ {
newjoke=0
section++
print "</pre>" > out;
print "<p align=center>" > out;
print "<a href=\"" sjmain "\">Index</a> |" > out;
print "<a href=\"mailto:jcdverha@xs4all.nl\">Comments and Contributions</a>" > out;
if (subref!="") {
  print " | <a href=\"" ref ".html\">" substr(nsection,2) "</a>" > out;}
print "</p>" > out;
print "<p align=center>" mycounter "</p>" > out;
  
print "</body></html>" > out;
close(out);
next;
}


# next joke
/______________________/ {newjoke=0}
/______________________/&&/M/ {subject=subject " <font color=\"red\"><em>mathematics</em></font>"}
/______________________/&&/P/ {subject=subject " <font color=\"red\"><em>physics</em></font>"}
/______________________/&&/C/ {subject=subject " <font color=\"red\"><em>chemistry</em></font>"}
/______________________/&&/E/ {subject=subject " <font color=\"red\"><em>engineering</em></font>"}
/______________________/&&/B/ {subject=subject " <font color=\"red\"><em>biology</em></font>"}
/______________________/&&/A/ {subject=subject " <font color=\"red\"><em>computer science</em></font>"}
/______________________/&&/^\*/ {
 subject="<font color=\"green\"><strong>New</strong></font> " last_posted subject
 newjoke=1 }
/______________________/&&/^&/ {
 subject="<font color=\"green\"><strong>Changed</strong></font> " last_posted subject
 newjoke=1 }
/______________________/ {
  print "</pre>\n<hr>" > out;
  if(subject!="") print subject "<br>" > out;
  print "<pre>" > out;
  
  if(newjoke) {beginnew=1; headnew="</pre>\n<hr>\n" subject "\n<pre>" }
  subject=""
  next;
}
/______________________/ {
  next;
}

# new section or subsection is characterized by number at beginning and all 
# capital text.
  /^=[0-9]+\./ && !/[a-z]/ {
 match($1,/\./)
 ref=substr($1,2,RSTART-2);
 if(length($1)>RSTART) {                     #subsection
  subref=substr($1,RSTART+1,1);
  out=sprintf("%s_%s.html",ref,subref);
  nsubsection = $0;
  c1=0
    } 
 else {                                      #section
   subref="";  nsection=$0;
   out=sprintf("%s.html",ref);
   c0=0 ; c1=1;
     }
 print "out=", out;
 print htmltype > out
 print "<html><head>" > out;
 print "<p align=center>" > out;
 print "<a href=\"" sjmain "\">Index</a> |" > out;
 print "<a href=\"mailto:jcdverha@xs4all.nl\">Comments and Contributions</a></p>" > out;
 title1=((nsection==$0)?"":(substr(nsection,2) " : ")) 
 title2=substr($0,2)
 print "<title>Science Jokes:" title1 title2 "</title>" > out;
 print "<META Name=\keywords\" Content=\"humor, humour, joke, " title2 " humor"  "  \">" > out;
 print "</head>" > out;
 print "<body bgcolor=\"#ffffff\">" > out;
 print "<h2>" substr(nsection,2) "</h2>" > out;         
 if(subref!="") print "<h3>" substr($0,2) "</h3>" > out;
 else {
    if(ref in nhref) {
    print "<h4>Subsections</h4>" > out;
    print "<ul>" > out;
    print "<li><a href=\"#main\">" substr($0,2) "</a>" > out
    for(i=0;i<=nhref[ref];i++) print href[ref,i] > out;
    print "</ul>" > out;
    print "<a name=\"main\"></a>" > out;
       }
 printf("<pre>") > out;
 }
 next;
 }

# print anything not yet printed:
{print $0 > out;}

newjoke {
 if (c0==0) { print "<hr>\n<h3>" substr(nsection,2) "</h3>" > new; c0=1}
 if((c1==0)&&(subref!="")) {print "<hr>\n<h4>" substr(nsubsection,2) "</h4>" > new; ; c1=1}
 if (beginnew) {print headnew > new; beginnew=0}
 print $0 > new

}

END {
# end newjokes
 print "</pre><p align=center>" > new;
 print "<a href=\"" sjmain "\">Index</a> |" > new;
 print "<a href=\"mailto:jcdverha@xs4all.nl\">Comments and Contributions</a></p>" > new;
  print "</body></html>" > new }
